{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensorflow version:2.3.1\n"
     ]
    }
   ],
   "source": [
    "print('tensorflow version:{}'.format(tf.__version__))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 建立dataset，使用一维列表建立\n",
    "dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: (), types: tf.int32>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset #shape为每一个组件的shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(1, shape=(), dtype=int32)\n",
      "tf.Tensor(2, shape=(), dtype=int32)\n",
      "tf.Tensor(3, shape=(), dtype=int32)\n",
      "tf.Tensor(4, shape=(), dtype=int32)\n",
      "tf.Tensor(5, shape=(), dtype=int32)\n",
      "tf.Tensor(6, shape=(), dtype=int32)\n",
      "tf.Tensor(7, shape=(), dtype=int32)\n"
     ]
    }
   ],
   "source": [
    "# 每个数据都是一个组件，自动转换成为Tensor\n",
    "for ele in dataset:\n",
    "    print(ele)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n"
     ]
    }
   ],
   "source": [
    "# 将数据转换成Numopy格式输出\n",
    "for ele in dataset:\n",
    "    print(ele.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 建立dataset，使用二维列表建立\n",
    "dataset = tf.data.Dataset.from_tensor_slices([[1,2],[3,4],[5,6]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: (2,), types: tf.int32>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset  #shape为每一个组件的shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 2]\n",
      "[3 4]\n",
      "[5 6]\n"
     ]
    }
   ],
   "source": [
    "for ele in dataset:\n",
    "    print(ele.numpy())  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 建立dataset，使用字典建立\n",
    "dataset = tf.data.Dataset.from_tensor_slices({'a':[1,2,3,4],\n",
    "                                              'b':[6,7,8,9],\n",
    "                                              'c':[12,13,14,15]\n",
    "                                             })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: {a: (), b: (), c: ()}, types: {a: tf.int32, b: tf.int32, c: tf.int32}>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'a': <tf.Tensor: shape=(), dtype=int32, numpy=1>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=6>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=12>}\n",
      "{'a': <tf.Tensor: shape=(), dtype=int32, numpy=2>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=7>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=13>}\n",
      "{'a': <tf.Tensor: shape=(), dtype=int32, numpy=3>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=8>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=14>}\n",
      "{'a': <tf.Tensor: shape=(), dtype=int32, numpy=4>, 'b': <tf.Tensor: shape=(), dtype=int32, numpy=9>, 'c': <tf.Tensor: shape=(), dtype=int32, numpy=15>}\n"
     ]
    }
   ],
   "source": [
    "for ele in dataset:\n",
    "    print(ele)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用Numpy来创建dataset\n",
    "dataset = tf.data.Dataset.from_tensor_slices(np.array([1,2,3,4,5,6,7]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n"
     ]
    }
   ],
   "source": [
    "for ele in dataset:\n",
    "    print(ele.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n"
     ]
    }
   ],
   "source": [
    "# 取出前4个数\n",
    "for ele in dataset.take(4):\n",
    "    print(ele.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: shape=(), dtype=int32, numpy=1>"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next(iter(dataset.take(1))) # 取第一个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 乱序：全部乱序，参数为数据长度\n",
    "dataset = dataset.shuffle(7)\n",
    "# 重复：count指定重复次数\n",
    "dataset = dataset.repeat(count = 2)\n",
    "# 将数据分批次：默认参数为batch_size指定每个批次大小\n",
    "dataset = dataset.batch(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[5 4 6]\n",
      "[2 1 4]\n",
      "[2 7 3]\n",
      "[7 3 1]\n",
      "[6 5 4]\n",
      "[7 6 1]\n",
      "[6 3 3]\n",
      "[2 7 5]\n",
      "[1 4 2]\n",
      "[5]\n"
     ]
    }
   ],
   "source": [
    "for ele in dataset:\n",
    "    print(ele.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用函数---将每个数据都进行平方\n",
    "dataset = dataset.map(tf.square)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "4\n",
      "9\n",
      "16\n",
      "25\n",
      "36\n",
      "49\n"
     ]
    }
   ],
   "source": [
    "for ele in dataset:\n",
    "    print(ele.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
